package com.webmagic.demo;

import com.alibaba.fastjson.JSON;
import com.webmagic.demo.Instrument.Instrument;
import lombok.extern.slf4j.Slf4j;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * @description
 * @Auther zhanglu
 * @Date 2018/4/15 下午3:18
 */
@Slf4j
public class MyProcessor implements PageProcessor {
    // 抓取网站的相关配置，包括编码、抓取间隔、重试次数等
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

    @Override
    public Site getSite() {
        return site;
    }

    @Override
    public void process(Page page) {
        String product = "//div[@class=\"solid_gray\"]";
        System.out.println(page.getHtml().xpath(product).get());
        Instrument instrument = new Instrument();
        instrument.setSupplier(page.getHtml().xpath("//div[@class=\"pro_R\"]/h3[2]/a/text()").get());
        instrument.setSupplierUrl(page.getHtml().xpath("//div[@class=\"pro_R\"]/h3[2]/a/@href").get());
        instrument.setCategoryOne(page.getHtml().xpath("//div[@class=\"fontArial\"]/a[3]/text()").get());
        instrument.setCategoryTwo(page.getHtml().xpath("//div[@class=\"fontArial\"]/a[4]/text()").get());
        instrument.setCategoryThree(page.getHtml().xpath("//div[@class=\"fontArial\"]/a[5]/text()").get());
        instrument.setChineseName(page.getHtml().xpath("//div[@class=\"fontArial\"]/a[6]/text()").get());
        instrument.setUrl(page.getHtml().xpath("//div[@class=\"fontArial\"]/a[6]/@href").get());
        instrument.setBrand(page.getHtml().xpath("//div[@class=\"info_con\"]/table/tbody/tr[1]/td[2]/span/a/text()").get());
        instrument.setModel(page.getHtml().xpath("//div[@class=\"info_con\"]/table/tbody/tr[1]/td[1]/span/text()").get());
        instrument.setQuote(page.getHtml().xpath("//div[@class=\"info_con\"]/table/tbody/tr[2]/td[2]/span/text()").get());
        instrument.setOriginPlace(page.getHtml().xpath("//div[@class=\"info_con\"]/table/tbody/tr[3]/td[2]/span/text()").get());
        instrument.setIntroduction(page.getHtml().xpath("//div[@class=\"IMShowApplyClass\"]/html()").get());

        System.out.println(JSON.toJSON(instrument));
    }

    public static void main(String[] args) {
        MyProcessor my = new MyProcessor();
        Spider.create(my).addUrl("http://www.instrument.com.cn/netshow/C181743.htm").thread(5).run();
    }
}
